/*
 * This source file is part of MyGUI. For the latest info, see http://mygui.info/
 * Distributed under the MIT License
 * (See accompanying file COPYING.MIT or copy at http://opensource.org/licenses/MIT)
 */

#include "MyGUI_Precompiled.h"
#include "MyGUI_UString.h"

namespace MyGUI
{

	//--------------------------------------------------------------------------
	void UString::_base_iterator::_seekFwd(size_type c)
	{
		mIter += c;
	}
	//--------------------------------------------------------------------------
	void UString::_base_iterator::_seekRev(size_type c)
	{
		mIter -= c;
	}
	//--------------------------------------------------------------------------
	void UString::_base_iterator::_become(const _base_iterator& i)
	{
		mIter = i.mIter;
		mString = i.mString;
	}
	//--------------------------------------------------------------------------
	bool UString::_base_iterator::_test_begin() const
	{
		return mIter == mString->mData.begin();
	}
	//--------------------------------------------------------------------------
	bool UString::_base_iterator::_test_end() const
	{
		return mIter == mString->mData.end();
	}
	//--------------------------------------------------------------------------
	UString::size_type UString::_base_iterator::_get_index() const
	{
		return mIter - mString->mData.begin();
	}
	//--------------------------------------------------------------------------
	void UString::_base_iterator::_jump_to(size_type index)
	{
		mIter = mString->mData.begin() + index;
	}
	//--------------------------------------------------------------------------
	UString::unicode_char UString::_base_iterator::_getCharacter() const
	{
		size_type current_index = _get_index();
		return mString->getChar(current_index);
	}
	//--------------------------------------------------------------------------
	int UString::_base_iterator::_setCharacter(unicode_char uc)
	{
		size_type current_index = _get_index();
		int change = mString->setChar(current_index, uc);
		_jump_to(current_index);
		return change;
	}
	//--------------------------------------------------------------------------
	void UString::_base_iterator::_moveNext()
	{
		_seekFwd(1); // move 1 code point forward
		if (_test_end())
			return; // exit if we hit the end
		if (_utf16_surrogate_follow(mIter[0]))
		{
			// landing on a follow code point means we might be part of a bigger character
			// so we test for that
			code_point lead_half = 0;
			//NB: we can't possibly be at the beginning here, so no need to test
			lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair
			if (_utf16_surrogate_lead(lead_half))
			{
				_seekFwd(1); // if so, then advance 1 more code point
			}
		}
	}
	//--------------------------------------------------------------------------
	void UString::_base_iterator::_movePrev()
	{
		_seekRev(1); // move 1 code point backwards
		if (_test_begin())
			return; // exit if we hit the beginning
		if (_utf16_surrogate_follow(mIter[0]))
		{
			// landing on a follow code point means we might be part of a bigger character
			// so we test for that
			code_point lead_half = 0;
			lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair
			if (_utf16_surrogate_lead(lead_half))
			{
				_seekRev(1); // if so, then rewind 1 more code point
			}
		}
	}
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	UString::_fwd_iterator& UString::_fwd_iterator::operator++()
	{
		_seekFwd(1);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator UString::_fwd_iterator::operator++(int)
	{
		_fwd_iterator tmp(*this);
		_seekFwd(1);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator& UString::_fwd_iterator::operator--()
	{
		_seekRev(1);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator UString::_fwd_iterator::operator--(int)
	{
		_fwd_iterator tmp(*this);
		_seekRev(1);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator UString::_fwd_iterator::operator+(difference_type n)
	{
		_fwd_iterator tmp(*this);
		if (n < 0)
			tmp._seekRev(-n);
		else
			tmp._seekFwd(n);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator UString::_fwd_iterator::operator-(difference_type n)
	{
		_fwd_iterator tmp(*this);
		if (n < 0)
			tmp._seekFwd(-n);
		else
			tmp._seekRev(n);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator& UString::_fwd_iterator::operator+=(difference_type n)
	{
		if (n < 0)
			_seekRev(-n);
		else
			_seekFwd(n);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator& UString::_fwd_iterator::operator-=(difference_type n)
	{
		if (n < 0)
			_seekFwd(-n);
		else
			_seekRev(n);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::value_type& UString::_fwd_iterator::operator*() const
	{
		return *mIter;
	}
	//--------------------------------------------------------------------------
	UString::value_type& UString::_fwd_iterator::operator[](difference_type n) const
	{
		_fwd_iterator tmp(*this);
		tmp += n;
		return *tmp;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator& UString::_fwd_iterator::moveNext()
	{
		_moveNext();
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_fwd_iterator& UString::_fwd_iterator::movePrev()
	{
		_movePrev();
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::unicode_char UString::_fwd_iterator::getCharacter() const
	{
		return _getCharacter();
	}
	//--------------------------------------------------------------------------
	int UString::_fwd_iterator::setCharacter(unicode_char uc)
	{
		return _setCharacter(uc);
	}
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator::_const_fwd_iterator() = default;
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator::_const_fwd_iterator(const _fwd_iterator& i)
	{
		_become(i);
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator++()
	{
		_seekFwd(1);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator UString::_const_fwd_iterator::operator++(int)
	{
		_const_fwd_iterator tmp(*this);
		_seekFwd(1);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator--()
	{
		_seekRev(1);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator UString::_const_fwd_iterator::operator--(int)
	{
		_const_fwd_iterator tmp(*this);
		_seekRev(1);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator UString::_const_fwd_iterator::operator+(difference_type n)
	{
		_const_fwd_iterator tmp(*this);
		if (n < 0)
			tmp._seekRev(-n);
		else
			tmp._seekFwd(n);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator UString::_const_fwd_iterator::operator-(difference_type n)
	{
		_const_fwd_iterator tmp(*this);
		if (n < 0)
			tmp._seekFwd(-n);
		else
			tmp._seekRev(n);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator+=(difference_type n)
	{
		if (n < 0)
			_seekRev(-n);
		else
			_seekFwd(n);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator-=(difference_type n)
	{
		if (n < 0)
			_seekFwd(-n);
		else
			_seekRev(n);
		return *this;
	}
	//--------------------------------------------------------------------------
	const UString::value_type& UString::_const_fwd_iterator::operator*() const
	{
		return *mIter;
	}
	//--------------------------------------------------------------------------
	const UString::value_type& UString::_const_fwd_iterator::operator[](difference_type n) const
	{
		_const_fwd_iterator tmp(*this);
		tmp += n;
		return *tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator& UString::_const_fwd_iterator::moveNext()
	{
		_moveNext();
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_fwd_iterator& UString::_const_fwd_iterator::movePrev()
	{
		_movePrev();
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::unicode_char UString::_const_fwd_iterator::getCharacter() const
	{
		return _getCharacter();
	}
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	UString::_rev_iterator& UString::_rev_iterator::operator++()
	{
		_seekRev(1);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator UString::_rev_iterator::operator++(int)
	{
		_rev_iterator tmp(*this);
		_seekRev(1);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator& UString::_rev_iterator::operator--()
	{
		_seekFwd(1);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator UString::_rev_iterator::operator--(int)
	{
		_rev_iterator tmp(*this);
		_seekFwd(1);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator UString::_rev_iterator::operator+(difference_type n)
	{
		_rev_iterator tmp(*this);
		if (n < 0)
			tmp._seekFwd(-n);
		else
			tmp._seekRev(n);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator UString::_rev_iterator::operator-(difference_type n)
	{
		_rev_iterator tmp(*this);
		if (n < 0)
			tmp._seekRev(-n);
		else
			tmp._seekFwd(n);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator& UString::_rev_iterator::operator+=(difference_type n)
	{
		if (n < 0)
			_seekFwd(-n);
		else
			_seekRev(n);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_rev_iterator& UString::_rev_iterator::operator-=(difference_type n)
	{
		if (n < 0)
			_seekRev(-n);
		else
			_seekFwd(n);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::value_type& UString::_rev_iterator::operator*() const
	{
		return mIter[-1];
	}
	//--------------------------------------------------------------------------
	UString::value_type& UString::_rev_iterator::operator[](difference_type n) const
	{
		_rev_iterator tmp(*this);
		tmp -= n;
		return *tmp;
	}
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator::_const_rev_iterator() = default;
	UString::_const_rev_iterator::_const_rev_iterator(const _rev_iterator& i)
	{
		_become(i);
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator& UString::_const_rev_iterator::operator++()
	{
		_seekRev(1);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator UString::_const_rev_iterator::operator++(int)
	{
		_const_rev_iterator tmp(*this);
		_seekRev(1);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator& UString::_const_rev_iterator::operator--()
	{
		_seekFwd(1);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator UString::_const_rev_iterator::operator--(int)
	{
		_const_rev_iterator tmp(*this);
		_seekFwd(1);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator UString::_const_rev_iterator::operator+(difference_type n)
	{
		_const_rev_iterator tmp(*this);
		if (n < 0)
			tmp._seekFwd(-n);
		else
			tmp._seekRev(n);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator UString::_const_rev_iterator::operator-(difference_type n)
	{
		_const_rev_iterator tmp(*this);
		if (n < 0)
			tmp._seekRev(-n);
		else
			tmp._seekFwd(n);
		return tmp;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator& UString::_const_rev_iterator::operator+=(difference_type n)
	{
		if (n < 0)
			_seekFwd(-n);
		else
			_seekRev(n);
		return *this;
	}
	//--------------------------------------------------------------------------
	UString::_const_rev_iterator& UString::_const_rev_iterator::operator-=(difference_type n)
	{
		if (n < 0)
			_seekRev(-n);
		else
			_seekFwd(n);
		return *this;
	}
	//--------------------------------------------------------------------------
	const UString::value_type& UString::_const_rev_iterator::operator*() const
	{
		return mIter[-1];
	}
	//--------------------------------------------------------------------------
	const UString::value_type& UString::_const_rev_iterator::operator[](difference_type n) const
	{
		_const_rev_iterator tmp(*this);
		tmp -= n;
		return *tmp;
	}
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	//--------------------------------------------------------------------------
	UString::UString()
	{
		_init();
	}
	//--------------------------------------------------------------------------
	UString::UString(const UString& copy)
	{
		_init();
		mData = copy.mData;
	}
	//--------------------------------------------------------------------------
	UString::UString(size_type length, const code_point& ch)
	{
		_init();
		assign(length, ch);
	}
	//--------------------------------------------------------------------------
	UString::UString(const code_point* str)
	{
		_init();
		assign(str);
	}
	//--------------------------------------------------------------------------
	UString::UString(const code_point* str, size_type length)
	{
		_init();
		assign(str, length);
	}
	//--------------------------------------------------------------------------
	UString::UString(const UString& str, size_type index, size_type length)
	{
		_init();
		assign(str, index, length);
	}
	//--------------------------------------------------------------------------
#if MYGUI_IS_NATIVE_WCHAR_T
	UString::UString(const wchar_t* w_str)
	{
		_init();
		assign(w_str);
	}
	//--------------------------------------------------------------------------
	UString::UString(const wchar_t* w_str, size_type length)
	{
		_init();
		assign(w_str, length);
	}
#endif
	//--------------------------------------------------------------------------
	UString::UString(const std::wstring& wstr)
	{
		_init();
		assign(wstr);
	}
	//--------------------------------------------------------------------------
	UString::UString(const char* c_str)
	{
		_init();
		assign(c_str, std::strlen(c_str));
	}
	//--------------------------------------------------------------------------
	UString::UString(const char* c_str, size_type length)
	{
		_init();
		assign(c_str, length);
	}
	//--------------------------------------------------------------------------
	UString::UString(const std::string& str)
	{
		_init();
		assign(str.data(), str.size());
	}
	//--------------------------------------------------------------------------
	UString::UString(const utf32string& str)
	{
		_init();
		assign(str);
	}
	//--------------------------------------------------------------------------
	UString::~UString()
	{
		_cleanBuffer();
	}
	//--------------------------------------------------------------------------
	UString::size_type UString::size() const
	{
		return mData.size();
	}
	//--------------------------------------------------------------------------
	UString::size_type UString::length() const
	{
		return size();
	}
	//--------------------------------------------------------------------------
	UString::size_type UString::length_Characters() const
	{
		const_iterator i = begin();
		const_iterator ie = end();
		size_type c = 0;
		while (i != ie)
		{
			i.moveNext();
			++c;
		}
		return c;
	}
	//--------------------------------------------------------------------------
	UString::size_type UString::max_size() const
	{
		return mData.max_size();
	}
	//--------------------------------------------------------------------------
	void UString::reserve(size_type size)
	{
		mData.reserve(size);
	}
	//--------------------------------------------------------------------------
	void UString::resize(size_type num, const code_point& val /*= 0 */)
	{
		mData.resize(num, val);
	}
	//--------------------------------------------------------------------------
	void UString::swap(UString& from)
	{
		mData.swap(from.mData);
	}
	//--------------------------------------------------------------------------
	bool UString::empty() const
	{
		return mData.empty();
	}
	//--------------------------------------------------------------------------
	const UString::code_point* UString::c_str() const
	{
		return mData.c_str();
	}
	//--------------------------------------------------------------------------
	const UString::code_point* UString::data() const
	{
		return c_str();
	}
	//--------------------------------------------------------------------------
	UString::size_type UString::capacity() const
	{
		return mData.capacity();
	}
	//--------------------------------------------------------------------------
	void UString::clear()
	{
		mData.clear();
	}
	//--------------------------------------------------------------------------
	UString UString::substr(size_type index, size_type num /*= npos */) const
	{
		// this could avoid the extra copy if we used a private specialty constructor
		dstring data = mData.substr(index, num);
		UString tmp;
		tmp.mData.swap(data);
		return tmp;
	}
	//--------------------------------------------------------------------------
	void UString::push_back(unicode_char val)
	{
		code_point cp[2];
		size_t c = _utf32_to_utf16(val, cp);
		if (c > 0)
			push_back(cp[0]);
		if (c > 1)
			push_back(cp[1]);
	}
	//--------------------------------------------------------------------------
#if MYGUI_IS_NATIVE_WCHAR_T
	void UString::push_back(wchar_t val)
	{
		// we do this because the Unicode method still preserves UTF-16 code points
		mData.push_back(static_cast<code_point>(val));
	}
#endif
	//--------------------------------------------------------------------------
	void UString::push_back(code_point val)
	{
		mData.push_back(val);
	}

	void UString::push_back(char val)
	{
		mData.push_back(static_cast<code_point>(val));
	}

	bool UString::inString(unicode_char ch) const
	{
		const_iterator i;
		const_iterator ie = end();
		for (i = begin(); i != ie; i.moveNext())
		{
			if (i.getCharacter() == ch)
				return true;
		}
		return false;
	}

	const std::string& UString::asUTF8() const
	{
		_load_buffer_UTF8();
		return *m_buffer.mStrBuffer;
	}

	const char* UString::asUTF8_c_str() const
	{
		_load_buffer_UTF8();
		return m_buffer.mStrBuffer->c_str();
	}

	const UString::utf32string& UString::asUTF32() const
	{
		_load_buffer_UTF32();
		return *m_buffer.mUTF32StrBuffer;
	}

	const UString::unicode_char* UString::asUTF32_c_str() const
	{
		_load_buffer_UTF32();
		return m_buffer.mUTF32StrBuffer->c_str();
	}

	const std::wstring& UString::asWStr() const
	{
		_load_buffer_WStr();
		return *m_buffer.mWStrBuffer;
	}

	const wchar_t* UString::asWStr_c_str() const
	{
		_load_buffer_WStr();
		return m_buffer.mWStrBuffer->c_str();
	}

	UString::code_point& UString::at(size_type loc)
	{
		return mData.at(loc);
	}

	const UString::code_point& UString::at(size_type loc) const
	{
		return mData.at(loc);
	}

	UString::unicode_char UString::getChar(size_type loc) const
	{
		const code_point* ptr = c_str();
		unicode_char uc;
		size_t l = _utf16_char_length(ptr[loc]);
		code_point cp[2] = {/* blame the code beautifier */
							0,
							0};
		cp[0] = ptr[loc];

		if (l == 2 && (loc + 1) < mData.length())
		{
			cp[1] = ptr[loc + 1];
		}
		_utf16_to_utf32(cp, uc);
		return uc;
	}

	int UString::setChar(size_type loc, unicode_char ch)
	{
		code_point cp[2] = {/* blame the code beautifier */
							0,
							0};
		size_t l = _utf32_to_utf16(ch, cp);
		unicode_char existingChar = getChar(loc);
		size_t existingSize = _utf16_char_length(existingChar);
		size_t newSize = _utf16_char_length(ch);

		if (newSize > existingSize)
		{
			at(loc) = cp[0];
			insert(loc + 1, 1, cp[1]);
			return 1;
		}
		if (newSize < existingSize)
		{
			erase(loc, 1);
			at(loc) = cp[0];
			return -1;
		}

		// newSize == existingSize
		at(loc) = cp[0];
		if (l == 2)
			at(loc + 1) = cp[1];
		return 0;
	}

	UString::iterator UString::begin()
	{
		iterator i;
		i.mIter = mData.begin();
		i.mString = this;
		return i;
	}

	UString::const_iterator UString::begin() const
	{
		const_iterator i;
		i.mIter = const_cast<UString*>(this)->mData.begin();
		i.mString = const_cast<UString*>(this);
		return i;
	}

	UString::iterator UString::end()
	{
		iterator i;
		i.mIter = mData.end();
		i.mString = this;
		return i;
	}

	UString::const_iterator UString::end() const
	{
		const_iterator i;
		i.mIter = const_cast<UString*>(this)->mData.end();
		i.mString = const_cast<UString*>(this);
		return i;
	}

	UString::reverse_iterator UString::rbegin()
	{
		reverse_iterator i;
		i.mIter = mData.end();
		i.mString = this;
		return i;
	}

	UString::const_reverse_iterator UString::rbegin() const
	{
		const_reverse_iterator i;
		i.mIter = const_cast<UString*>(this)->mData.end();
		i.mString = const_cast<UString*>(this);
		return i;
	}

	UString::reverse_iterator UString::rend()
	{
		reverse_iterator i;
		i.mIter = mData.begin();
		i.mString = this;
		return i;
	}

	UString::const_reverse_iterator UString::rend() const
	{
		const_reverse_iterator i;
		i.mIter = const_cast<UString*>(this)->mData.begin();
		i.mString = const_cast<UString*>(this);
		return i;
	}

	UString& UString::assign(iterator start, iterator end)
	{
		mData.assign(start.mIter, end.mIter);
		return *this;
	}

	UString& UString::assign(const UString& str)
	{
		mData.assign(str.mData);
		return *this;
	}

	UString& UString::assign(const code_point* str)
	{
		mData.assign(str);
		return *this;
	}

	UString& UString::assign(const code_point* str, size_type num)
	{
		mData.assign(str, num);
		return *this;
	}

	UString& UString::assign(const UString& str, size_type index, size_type len)
	{
		mData.assign(str.mData, index, len);
		return *this;
	}

	UString& UString::assign(size_type num, const code_point& ch)
	{
		mData.assign(num, ch);
		return *this;
	}

	UString& UString::assign(const std::wstring& wstr)
	{
		mData.clear();
		mData.reserve(wstr.length()); // best guess bulk allocate
#ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
		code_point tmp;
		std::wstring::const_iterator i, ie = wstr.end();
		for (i = wstr.begin(); i != ie; i++)
		{
			tmp = static_cast<code_point>(*i);
			mData.push_back(tmp);
		}
#else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
		code_point cp[3] = {0, 0, 0};
		unicode_char tmp;
		std::wstring::const_iterator i;
		std::wstring::const_iterator ie = wstr.end();
		for (i = wstr.begin(); i != ie; i++)
		{
			tmp = static_cast<unicode_char>(*i);
			size_t l = _utf32_to_utf16(tmp, cp);
			if (l > 0)
				mData.push_back(cp[0]);
			if (l > 1)
				mData.push_back(cp[1]);
		}
#endif
		return *this;
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString& UString::assign(const wchar_t* w_str)
	{
		std::wstring tmp;
		tmp.assign(w_str);
		return assign(tmp);
	}

	UString& UString::assign(const wchar_t* w_str, size_type num)
	{
		std::wstring tmp;
		tmp.assign(w_str, num);
		return assign(tmp);
	}
#endif

	UString& UString::assign(const utf32string& str)
	{
		for (const auto& character : str)
		{
			push_back(character);
		}
		return *this;
	}

	UString& UString::assign(const char* c_str, size_type num)
	{
		size_type len = _verifyUTF8(c_str, num);
		clear(); // empty our contents, if there are any
		reserve(len); // best guess bulk capacity growth

		// This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32,
		// then converting it to UTF-16, then finally appending the data buffer

		unicode_char uc; // temporary Unicode character buffer
		unsigned char utf8buf[7]; // temporary UTF-8 buffer
		utf8buf[6] = 0;
		size_t utf8len; // UTF-8 length
		code_point utf16buff[3]; // temporary UTF-16 buffer
		utf16buff[2] = 0;
		size_t utf16len; // UTF-16 length

		for (size_type i = 0; i < num; ++i)
		{
			utf8len =
				std::min(_utf8_char_length(static_cast<unsigned char>(c_str[i])), num - i); // estimate bytes to load
			for (size_t j = 0; j < utf8len; j++)
			{ // load the needed UTF-8 bytes
				utf8buf[j] = (static_cast<unsigned char>(
					c_str
						[i +
						 j])); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful)
			}
			utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer
			utf8len = _utf8_to_utf32(utf8buf, uc); // do the UTF-8 -> UTF-32 conversion
			i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop

			utf16len = _utf32_to_utf16(uc, utf16buff); // UTF-32 -> UTF-16 conversion
			append(utf16buff, utf16len); // append the characters to the string
		}
		return *this;
	}

	UString& UString::append(const UString& str)
	{
		mData.append(str.mData);
		return *this;
	}

	UString& UString::append(const code_point* str)
	{
		mData.append(str);
		return *this;
	}

	UString& UString::append(const UString& str, size_type index, size_type len)
	{
		mData.append(str.mData, index, len);
		return *this;
	}

	UString& UString::append(const code_point* str, size_type num)
	{
		mData.append(str, num);
		return *this;
	}

	UString& UString::append(size_type num, code_point ch)
	{
		mData.append(num, ch);
		return *this;
	}

	UString& UString::append(iterator start, iterator end)
	{
		mData.append(start.mIter, end.mIter);
		return *this;
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString& UString::append(const wchar_t* w_str, size_type num)
	{
		std::wstring tmp(w_str, num);
		return append(tmp);
	}

	UString& UString::append(size_type num, wchar_t ch)
	{
		return append(num, static_cast<unicode_char>(ch));
	}
#endif
	UString& UString::append(const char* c_str, size_type num)
	{
		UString tmp(c_str, num);
		append(tmp);
		return *this;
	}

	UString& UString::append(size_type num, char ch)
	{
		append(num, static_cast<code_point>(ch));
		return *this;
	}

	UString& UString::append(size_type num, unicode_char ch)
	{
		code_point cp[2] = {0, 0};
		if (_utf32_to_utf16(ch, cp) == 2)
		{
			for (size_type i = 0; i < num; i++)
			{
				append(1, cp[0]);
				append(1, cp[1]);
			}
		}
		else
		{
			for (size_type i = 0; i < num; i++)
			{
				append(1, cp[0]);
			}
		}
		return *this;
	}

	UString::iterator UString::insert(iterator i, const code_point& ch)
	{
		iterator ret;
		ret.mIter = mData.insert(i.mIter, ch);
		ret.mString = this;
		return ret;
	}

	UString& UString::insert(size_type index, const UString& str)
	{
		mData.insert(index, str.mData);
		return *this;
	}

	UString& UString::insert(size_type index1, const UString& str, size_type index2, size_type num)
	{
		mData.insert(index1, str.mData, index2, num);
		return *this;
	}

	void UString::insert(iterator i, iterator start, iterator end)
	{
		mData.insert(i.mIter, start.mIter, end.mIter);
	}

	UString& UString::insert(size_type index, const code_point* str, size_type num)
	{
		mData.insert(index, str, num);
		return *this;
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString& UString::insert(size_type index, const wchar_t* w_str, size_type num)
	{
		UString tmp(w_str, num);
		insert(index, tmp);
		return *this;
	}
#endif

	UString& UString::insert(size_type index, const char* c_str, size_type num)
	{
		UString tmp(c_str, num);
		insert(index, tmp);
		return *this;
	}

	UString& UString::insert(size_type index, size_type num, code_point ch)
	{
		mData.insert(index, num, ch);
		return *this;
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString& UString::insert(size_type index, size_type num, wchar_t ch)
	{
		insert(index, num, static_cast<unicode_char>(ch));
		return *this;
	}
#endif

	UString& UString::insert(size_type index, size_type num, char ch)
	{
		insert(index, num, static_cast<code_point>(ch));
		return *this;
	}

	UString& UString::insert(size_type index, size_type num, unicode_char ch)
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16(ch, cp);
		if (l == 1)
		{
			return insert(index, num, cp[0]);
		}
		for (size_type c = 0; c < num; c++)
		{
			// insert in reverse order to preserve ordering after insert
			insert(index, 1, cp[1]);
			insert(index, 1, cp[0]);
		}
		return *this;
	}

	void UString::insert(iterator i, size_type num, const code_point& ch)
	{
		mData.insert(i.mIter, num, ch);
	}
#if MYGUI_IS_NATIVE_WCHAR_T
	void UString::insert(iterator i, size_type num, const wchar_t& ch)
	{
		insert(i, num, static_cast<unicode_char>(ch));
	}
#endif

	void UString::insert(iterator i, size_type num, const char& ch)
	{
		insert(i, num, static_cast<code_point>(ch));
	}

	void UString::insert(iterator i, size_type num, const unicode_char& ch)
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16(ch, cp);
		if (l == 1)
		{
			insert(i, num, cp[0]);
		}
		else
		{
			for (size_type c = 0; c < num; c++)
			{
				// insert in reverse order to preserve ordering after insert
				insert(i, 1, cp[1]);
				insert(i, 1, cp[0]);
			}
		}
	}

	UString::iterator UString::erase(iterator loc)
	{
		iterator ret;
		ret.mIter = mData.erase(loc.mIter);
		ret.mString = this;
		return ret;
	}

	UString::iterator UString::erase(iterator start, iterator end)
	{
		iterator ret;
		ret.mIter = mData.erase(start.mIter, end.mIter);
		ret.mString = this;
		return ret;
	}

	UString& UString::erase(size_type index /*= 0*/, size_type num /*= npos */)
	{
		if (num == npos)
			mData.erase(index);
		else
			mData.erase(index, num);
		return *this;
	}

	UString& UString::replace(size_type index1, size_type num1, const UString& str)
	{
		mData.replace(index1, num1, str.mData, 0, npos);
		return *this;
	}

	UString& UString::replace(size_type index1, size_type num1, const UString& str, size_type num2)
	{
		mData.replace(index1, num1, str.mData, 0, num2);
		return *this;
	}

	UString& UString::replace(size_type index1, size_type num1, const UString& str, size_type index2, size_type num2)
	{
		mData.replace(index1, num1, str.mData, index2, num2);
		return *this;
	}

	UString& UString::replace(iterator start, iterator end, const UString& str, size_type num /*= npos */)
	{
		_const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload

		size_type index1 = begin() - st;
		size_type num1 = end - st;
		return replace(index1, num1, str, 0, num);
	}

	UString& UString::replace(size_type index, size_type num1, size_type num2, code_point ch)
	{
		mData.replace(index, num1, num2, ch);
		return *this;
	}

	UString& UString::replace(iterator start, iterator end, size_type num, code_point ch)
	{
		_const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload

		size_type index1 = begin() - st;
		size_type num1 = end - st;
		return replace(index1, num1, num, ch);
	}

	int UString::compare(const UString& str) const
	{
		return mData.compare(str.mData);
	}

	int UString::compare(const code_point* str) const
	{
		return mData.compare(str);
	}

	int UString::compare(size_type index, size_type length, const UString& str) const
	{
		return mData.compare(index, length, str.mData);
	}

	int UString::compare(size_type index, size_type length, const UString& str, size_type index2, size_type length2)
		const
	{
		return mData.compare(index, length, str.mData, index2, length2);
	}

	int UString::compare(size_type index, size_type length, const code_point* str, size_type length2) const
	{
		return mData.compare(index, length, str, length2);
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	int UString::compare(size_type index, size_type length, const wchar_t* w_str, size_type length2) const
	{
		UString tmp(w_str, length2);
		return compare(index, length, tmp);
	}
#endif

	int UString::compare(size_type index, size_type length, const char* c_str, size_type length2) const
	{
		UString tmp(c_str, length2);
		return compare(index, length, tmp);
	}

	UString::size_type UString::find(const UString& str, size_type index /*= 0 */) const
	{
		return mData.find(str.c_str(), index);
	}

	UString::size_type UString::find(const code_point* cp_str, size_type index, size_type length) const
	{
		UString tmp(cp_str);
		return mData.find(tmp.c_str(), index, length);
	}

	UString::size_type UString::find(const char* c_str, size_type index, size_type length) const
	{
		UString tmp(c_str);
		return mData.find(tmp.c_str(), index, length);
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::find(const wchar_t* w_str, size_type index, size_type length) const
	{
		UString tmp(w_str);
		return mData.find(tmp.c_str(), index, length);
	}
#endif

	UString::size_type UString::find(char ch, size_type index /*= 0 */) const
	{
		return find(static_cast<code_point>(ch), index);
	}

	UString::size_type UString::find(code_point ch, size_type index /*= 0 */) const
	{
		return mData.find(ch, index);
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::find(wchar_t ch, size_type index /*= 0 */) const
	{
		return find(static_cast<unicode_char>(ch), index);
	}
#endif

	UString::size_type UString::find(unicode_char ch, size_type index /*= 0 */) const
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16(ch, cp);
		return find(UString(cp, l), index);
	}

	UString::size_type UString::rfind(const UString& str, size_type index /*= 0 */) const
	{
		return mData.rfind(str.c_str(), index);
	}

	UString::size_type UString::rfind(const code_point* cp_str, size_type index, size_type num) const
	{
		UString tmp(cp_str);
		return mData.rfind(tmp.c_str(), index, num);
	}

	UString::size_type UString::rfind(const char* c_str, size_type index, size_type num) const
	{
		UString tmp(c_str);
		return mData.rfind(tmp.c_str(), index, num);
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::rfind(const wchar_t* w_str, size_type index, size_type num) const
	{
		UString tmp(w_str);
		return mData.rfind(tmp.c_str(), index, num);
	}
#endif

	UString::size_type UString::rfind(char ch, size_type index /*= 0 */) const
	{
		return rfind(static_cast<code_point>(ch), index);
	}

	UString::size_type UString::rfind(code_point ch, size_type index) const
	{
		return mData.rfind(ch, index);
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::rfind(wchar_t ch, size_type index /*= 0 */) const
	{
		return rfind(static_cast<unicode_char>(ch), index);
	}
#endif

	UString::size_type UString::rfind(unicode_char ch, size_type index /*= 0 */) const
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16(ch, cp);
		return rfind(UString(cp, l), index);
	}

	UString::size_type UString::find_first_of(const UString& str, size_type index /*= 0*/, size_type num /*= npos */)
		const
	{
		size_type i = 0;
		const size_type len = length();
		while (i < num && (index + i) < len)
		{
			unicode_char ch = getChar(index + i);
			if (str.inString(ch))
				return index + i;
			i += _utf16_char_length(ch); // increment by the Unicode character length
		}
		return npos;
	}

	UString::size_type UString::find_first_of(code_point ch, size_type index /*= 0 */) const
	{
		UString tmp;
		tmp.assign(1, ch);
		return find_first_of(tmp, index);
	}

	UString::size_type UString::find_first_of(char ch, size_type index /*= 0 */) const
	{
		return find_first_of(static_cast<code_point>(ch), index);
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::find_first_of(wchar_t ch, size_type index /*= 0 */) const
	{
		return find_first_of(static_cast<unicode_char>(ch), index);
	}
#endif

	UString::size_type UString::find_first_of(unicode_char ch, size_type index /*= 0 */) const
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16(ch, cp);
		return find_first_of(UString(cp, l), index);
	}

	UString::size_type UString::find_first_not_of(
		const UString& str,
		size_type index /*= 0*/,
		size_type num /*= npos */) const
	{
		size_type i = 0;
		const size_type len = length();
		while (i < num && (index + i) < len)
		{
			unicode_char ch = getChar(index + i);
			if (!str.inString(ch))
				return index + i;
			i += _utf16_char_length(ch); // increment by the Unicode character length
		}
		return npos;
	}

	UString::size_type UString::find_first_not_of(code_point ch, size_type index /*= 0 */) const
	{
		UString tmp;
		tmp.assign(1, ch);
		return find_first_not_of(tmp, index);
	}

	UString::size_type UString::find_first_not_of(char ch, size_type index /*= 0 */) const
	{
		return find_first_not_of(static_cast<code_point>(ch), index);
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::find_first_not_of(wchar_t ch, size_type index /*= 0 */) const
	{
		return find_first_not_of(static_cast<unicode_char>(ch), index);
	}
#endif

	UString::size_type UString::find_first_not_of(unicode_char ch, size_type index /*= 0 */) const
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16(ch, cp);
		return find_first_not_of(UString(cp, l), index);
	}

	UString::size_type UString::find_last_of(const UString& str, size_type index /*= npos*/, size_type num /*= npos */)
		const
	{
		size_type i = 0;
		const size_type len = length();
		if (index > len)
			index = len - 1;

		while (i < num && (index - i) != npos)
		{
			size_type j = index - i;
			// careful to step full Unicode characters
			if (j != 0 && _utf16_surrogate_follow(at(j)) && _utf16_surrogate_lead(at(j - 1)))
			{
				j = index - ++i;
			}
			// and back to the usual dull test
			unicode_char ch = getChar(j);
			if (str.inString(ch))
				return j;
			i++;
		}
		return npos;
	}

	UString::size_type UString::find_last_of(code_point ch, size_type index /*= npos */) const
	{
		UString tmp;
		tmp.assign(1, ch);
		return find_last_of(tmp, index);
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::find_last_of(wchar_t ch, size_type index /*= npos */) const
	{
		return find_last_of(static_cast<unicode_char>(ch), index);
	}
#endif

	UString::size_type UString::find_last_of(unicode_char ch, size_type index /*= npos */) const
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16(ch, cp);
		return find_last_of(UString(cp, l), index);
	}

	UString::size_type UString::find_last_not_of(
		const UString& str,
		size_type index /*= npos*/,
		size_type num /*= npos */) const
	{
		size_type i = 0;
		const size_type len = length();
		if (index > len)
			index = len - 1;

		while (i < num && (index - i) != npos)
		{
			size_type j = index - i;
			// careful to step full Unicode characters
			if (j != 0 && _utf16_surrogate_follow(at(j)) && _utf16_surrogate_lead(at(j - 1)))
			{
				j = index - ++i;
			}
			// and back to the usual dull test
			unicode_char ch = getChar(j);
			if (!str.inString(ch))
				return j;
			i++;
		}
		return npos;
	}

	UString::size_type UString::find_last_not_of(code_point ch, size_type index /*= npos */) const
	{
		UString tmp;
		tmp.assign(1, ch);
		return find_last_not_of(tmp, index);
	}

	UString::size_type UString::find_last_not_of(char ch, size_type index /*= npos */) const
	{
		return find_last_not_of(static_cast<code_point>(ch), index);
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString::size_type UString::find_last_not_of(wchar_t ch, size_type index /*= npos */) const
	{
		return find_last_not_of(static_cast<unicode_char>(ch), index);
	}
#endif

	UString::size_type UString::find_last_not_of(unicode_char ch, size_type index /*= npos */) const
	{
		code_point cp[3] = {0, 0, 0};
		size_t l = _utf32_to_utf16(ch, cp);
		return find_last_not_of(UString(cp, l), index);
	}

	bool UString::operator<(const UString& right) const
	{
		return compare(right) < 0;
	}

	bool UString::operator<=(const UString& right) const
	{
		return compare(right) <= 0;
	}

	UString& UString::operator=(const UString& s)
	{
		return assign(s);
	}

	UString& UString::operator=(code_point ch)
	{
		clear();
		return append(1, ch);
	}

	UString& UString::operator=(char ch)
	{
		clear();
		return append(1, ch);
	}

#if MYGUI_IS_NATIVE_WCHAR_T
	UString& UString::operator=(wchar_t ch)
	{
		clear();
		return append(1, ch);
	}
#endif

	UString& UString::operator=(unicode_char ch)
	{
		clear();
		return append(1, ch);
	}

	bool UString::operator>(const UString& right) const
	{
		return compare(right) > 0;
	}

	bool UString::operator>=(const UString& right) const
	{
		return compare(right) >= 0;
	}

	bool UString::operator==(const UString& right) const
	{
		return compare(right) == 0;
	}

	bool UString::operator!=(const UString& right) const
	{
		return !operator==(right);
	}

	UString::code_point& UString::operator[](size_type index)
	{
		return at(index);
	}

	const UString::code_point& UString::operator[](size_type index) const
	{
		return at(index);
	}

	UString::operator std::string() const
	{
		return asUTF8();
	}

	//! implicit cast to std::wstring
	UString::operator std::wstring() const
	{
		return asWStr();
	}


	bool UString::_utf16_independent_char(code_point cp)
	{
		// tests if the cp is within the surrogate pair range
		// everything else is a standalone code point, ot it matches a surrogate pair signature
		return 0xD800 > cp || cp > 0xDFFF;
	}

	bool UString::_utf16_surrogate_lead(code_point cp)
	{
		// tests if the cp is within the 2nd word of a surrogate pair
		// it is a 1st word, or it isn't
		return 0xD800 <= cp && cp <= 0xDBFF;
	}

	bool UString::_utf16_surrogate_follow(code_point cp)
	{
		// tests if the cp is within the 2nd word of a surrogate pair
		// it is a 2nd word, everything else isn't
		return 0xDC00 <= cp && cp <= 0xDFFF;
	}

	size_t UString::_utf16_char_length(code_point cp)
	{
		if (0xD800 <= cp && cp <= 0xDBFF) // test if cp is the beginning of a surrogate pair
			return 2; // if it is, then we are 2 words long
		return 1; // otherwise we are only 1 word long
	}

	size_t UString::_utf16_char_length(unicode_char uc)
	{
		if (uc > 0xFFFF) // test if uc is greater than the single word maximum
			return 2; // if so, we need a surrogate pair
		return 1; // otherwise we can stuff it into a single word
	}

	size_t UString::_utf16_to_utf32(const code_point in_cp[2], unicode_char& out_uc)
	{
		const code_point& cp1 = in_cp[0];
		const code_point& cp2 = in_cp[1];
		bool wordPair = false;

		// does it look like a surrogate pair?
		if (0xD800 <= cp1 && cp1 <= 0xDBFF)
		{
			// looks like one, but does the other half match the algorithm as well?
			if (0xDC00 <= cp2 && cp2 <= 0xDFFF)
				wordPair = true; // yep!
		}

		if (!wordPair)
		{ // if we aren't a 100% authentic surrogate pair, then just copy the value
			out_uc = cp1;
			return 1;
		}

		unsigned short cU = cp1;
		unsigned short cL = cp2; // copy upper and lower words of surrogate pair to writable buffers
		cU -= 0xD800; // remove the encoding markers
		cL -= 0xDC00;

		out_uc = (cU & 0x03FF) << 10; // grab the 10 upper bits and set them in their proper location
		out_uc |= (cL & 0x03FF); // combine in the lower 10 bits
		out_uc += 0x10000; // add back in the value offset

		return 2; // this whole operation takes to words, so that's what we'll return
	}

	size_t UString::_utf32_to_utf16(const unicode_char& in_uc, code_point out_cp[2])
	{
		if (in_uc <= 0xFFFF)
		{ // we blindly preserve sentinel values because our decoder understands them
			out_cp[0] = static_cast<code_point>(in_uc);
			return 1;
		}
		unicode_char uc = in_uc; // copy to writable buffer
		unsigned short tmp; // single code point buffer
		uc -= 0x10000; // subtract value offset

		//process upper word
		tmp = static_cast<unsigned short>((uc >> 10) & 0x03FF); // grab the upper 10 bits
		tmp += 0xD800; // add encoding offset
		out_cp[0] = tmp; // write

		// process lower word
		tmp = static_cast<unsigned short>(uc & 0x03FF); // grab the lower 10 bits
		tmp += 0xDC00; // add encoding offset
		out_cp[1] = tmp; // write

		return 2; // return used word count (2 for surrogate pairs)
	}

	bool UString::_utf8_start_char(unsigned char cp)
	{
		return (cp & ~_cont_mask) != _cont;
	}

	size_t UString::_utf8_char_length(unsigned char cp)
	{
		if (!(cp & 0x80))
			return 1;
		if ((cp & ~_lead1_mask) == _lead1)
			return 2;
		if ((cp & ~_lead2_mask) == _lead2)
			return 3;
		if ((cp & ~_lead3_mask) == _lead3)
			return 4;
		if ((cp & ~_lead4_mask) == _lead4)
			return 5;
		if ((cp & ~_lead5_mask) == _lead5)
			return 6;

		return 1;
		//throw invalid_data( "invalid UTF-8 sequence header value" );
	}

	size_t UString::_utf8_char_length(unicode_char uc)
	{
		/*
		7 bit:  U-00000000 - U-0000007F: 0xxxxxxx
		11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
		16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
		21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
		26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
		31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
		*/
		if (!(uc & ~0x0000007F))
			return 1;
		if (!(uc & ~0x000007FF))
			return 2;
		if (!(uc & ~0x0000FFFF))
			return 3;
		if (!(uc & ~0x001FFFFF))
			return 4;
		if (!(uc & ~0x03FFFFFF))
			return 5;
		if (!(uc & ~0x7FFFFFFF))
			return 6;

		return 1;
		//throw invalid_data( "invalid UTF-32 value" );
	}

	size_t UString::_utf8_to_utf32(const unsigned char in_cp[6], unicode_char& out_uc)
	{
		size_t len = _utf8_char_length(in_cp[0]);
		if (len == 1)
		{ // if we are only 1 byte long, then just grab it and exit
			out_uc = in_cp[0];
			return 1;
		}

		unicode_char c = 0; // temporary buffer
		size_t i = 0;
		switch (len)
		{ // load header byte
		case 6: c = in_cp[i] & _lead5_mask; break;
		case 5: c = in_cp[i] & _lead4_mask; break;
		case 4: c = in_cp[i] & _lead3_mask; break;
		case 3: c = in_cp[i] & _lead2_mask; break;
		case 2: c = in_cp[i] & _lead1_mask; break;
		default: break;
		}

		// load each continuation byte
		for (++i; i < len; i++)
		{
			if ((in_cp[i] & ~_cont_mask) != _cont)
			{
				//throw invalid_data( "bad UTF-8 continuation byte" );
				out_uc = in_cp[0];
				return 1;
			}
			c <<= 6;
			c |= (in_cp[i] & _cont_mask);
		}

		out_uc = c; // write the final value and return the used byte length
		return len;
	}

	size_t UString::_utf32_to_utf8(const unicode_char& in_uc, unsigned char out_cp[6])
	{
		size_t len = _utf8_char_length(in_uc); // predict byte length of sequence
		unicode_char c = in_uc; // copy to temp buffer

		//stuff all of the lower bits
		for (size_t i = len - 1; i > 0; i--)
		{
			out_cp[i] = static_cast<unsigned char>(((c)&_cont_mask) | _cont);
			c >>= 6;
		}

		//now write the header byte
		switch (len)
		{
		case 6: out_cp[0] = static_cast<unsigned char>(((c)&_lead5_mask) | _lead5); break;
		case 5: out_cp[0] = static_cast<unsigned char>(((c)&_lead4_mask) | _lead4); break;
		case 4: out_cp[0] = static_cast<unsigned char>(((c)&_lead3_mask) | _lead3); break;
		case 3: out_cp[0] = static_cast<unsigned char>(((c)&_lead2_mask) | _lead2); break;
		case 2: out_cp[0] = static_cast<unsigned char>(((c)&_lead1_mask) | _lead1); break;
		case 1:
		default: out_cp[0] = static_cast<unsigned char>((c)&0x7F); break;
		}

		// return the byte length of the sequence
		return len;
	}

	UString::size_type UString::_verifyUTF8(const unsigned char* c_str)
	{
		std::string_view tmp(reinterpret_cast<const char*>(c_str));
		return _verifyUTF8(tmp);
	}

	UString::size_type UString::_verifyUTF8(const char* c_str, size_type num)
	{
		size_type length = 0;

		for (size_type i = 0; i < num; ++i)
		{
			// characters pass until we find an extended sequence
			if (c_str[i] & 0x80)
			{
				if (i + 1 >= num) // invalid extended sequence
					return num;

				unsigned char c = c_str[i];
				size_t contBytes = 0;

				// get continuation byte count and test for overlong sequences
				if ((c & ~_lead1_mask) == _lead1)
				{ // 1 additional byte
					if (c == _lead1)
					{
						//throw invalid_data( "overlong UTF-8 sequence" );
						return num;
					}
					contBytes = 1;
				}
				else if ((c & ~_lead2_mask) == _lead2)
				{ // 2 additional bytes
					contBytes = 2;
					if (c == _lead2)
					{ // possible overlong UTF-8 sequence
						c = c_str[i + 1]; // look ahead to next byte in sequence
						if ((c & _lead2) == _cont)
						{
							//throw invalid_data( "overlong UTF-8 sequence" );
							return num;
						}
					}
				}
				else if ((c & ~_lead3_mask) == _lead3)
				{ // 3 additional bytes
					contBytes = 3;
					if (c == _lead3)
					{ // possible overlong UTF-8 sequence
						c = c_str[i + 1]; // look ahead to next byte in sequence
						if ((c & _lead3) == _cont)
						{
							//throw invalid_data( "overlong UTF-8 sequence" );
							return num;
						}
					}
				}
				else if ((c & ~_lead4_mask) == _lead4)
				{ // 4 additional bytes
					contBytes = 4;
					if (c == _lead4)
					{ // possible overlong UTF-8 sequence
						c = c_str[i + 1]; // look ahead to next byte in sequence
						if ((c & _lead4) == _cont)
						{
							//throw invalid_data( "overlong UTF-8 sequence" );
							return num;
						}
					}
				}
				else if ((c & ~_lead5_mask) == _lead5)
				{ // 5 additional bytes
					contBytes = 5;
					if (c == _lead5)
					{ // possible overlong UTF-8 sequence
						c = c_str[i + 1]; // look ahead to next byte in sequence
						if ((c & _lead5) == _cont)
						{
							//throw invalid_data( "overlong UTF-8 sequence" );
							return num;
						}
					}
				}
				if (i + contBytes >= num) // invalid extended sequence
					return num;
				// check remaining continuation bytes for
				while (contBytes--)
				{
					c = c_str[++i]; // get next byte in sequence
					if ((c & ~_cont_mask) != _cont)
					{
						//throw invalid_data( "bad UTF-8 continuation byte" );
						return num;
					}
				}
			}
			length++;
		}
		return length;
	}

	void UString::_init()
	{
		m_buffer.mVoidBuffer = nullptr;
		m_bufferType = bt_none;
		m_bufferSize = 0;
	}

	void UString::_cleanBuffer() const
	{
		if (m_buffer.mVoidBuffer != nullptr)
		{
			switch (m_bufferType)
			{
			case bt_string: delete m_buffer.mStrBuffer; break;
			case bt_wstring: delete m_buffer.mWStrBuffer; break;
			case bt_utf32string: delete m_buffer.mUTF32StrBuffer; break;
			case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out
				//delete m_buffer.mVoidBuffer;
				// delete void* is undefined, don't do that
				static_assert("This should never happen - mVoidBuffer should never contain something if we "
							  "don't know the type");
				break;
			}
			m_buffer.mVoidBuffer = nullptr;
			m_bufferSize = 0;
			m_bufferType = bt_none;
		}
	}

	void UString::_getBufferStr() const
	{
		if (m_bufferType != bt_string)
		{
			_cleanBuffer();
			m_buffer.mStrBuffer = new std::string();
			m_bufferType = bt_string;
		}
		m_buffer.mStrBuffer->clear();
	}

	void UString::_getBufferWStr() const
	{
		if (m_bufferType != bt_wstring)
		{
			_cleanBuffer();
			m_buffer.mWStrBuffer = new std::wstring();
			m_bufferType = bt_wstring;
		}
		m_buffer.mWStrBuffer->clear();
	}

	void UString::_getBufferUTF32Str() const
	{
		if (m_bufferType != bt_utf32string)
		{
			_cleanBuffer();
			m_buffer.mUTF32StrBuffer = new utf32string();
			m_bufferType = bt_utf32string;
		}
		m_buffer.mUTF32StrBuffer->clear();
	}

	void UString::_load_buffer_UTF8() const
	{
		_getBufferStr();
		std::string& buffer = (*m_buffer.mStrBuffer);
		buffer.reserve(length());

		unsigned char utf8buf[6];
		char* charbuf = (char*)utf8buf;
		unicode_char c;
		size_t len;

		const_iterator i;
		const_iterator ie = end();
		for (i = begin(); i != ie; i.moveNext())
		{
			c = i.getCharacter();
			len = _utf32_to_utf8(c, utf8buf);
			size_t j = 0;
			while (j < len)
				buffer.push_back(charbuf[j++]);
		}
	}

	void UString::_load_buffer_WStr() const
	{
		_getBufferWStr();
		std::wstring& buffer = (*m_buffer.mWStrBuffer);
		buffer.reserve(length()); // may over reserve, but should be close enough
#ifdef WCHAR_UTF16 // wchar_t matches UTF-16
		const_iterator i, ie = end();
		for (i = begin(); i != ie; ++i)
		{
			buffer.push_back((wchar_t)(*i));
		}
#else // wchar_t fits UTF-32
		unicode_char c;
		const_iterator i;
		const_iterator ie = end();
		for (i = begin(); i != ie; i.moveNext())
		{
			c = i.getCharacter();
			buffer.push_back((wchar_t)c);
		}
#endif
	}

	void UString::_load_buffer_UTF32() const
	{
		_getBufferUTF32Str();
		utf32string& buffer = (*m_buffer.mUTF32StrBuffer);
		buffer.reserve(length()); // may over reserve, but should be close enough

		unicode_char c;

		const_iterator i;
		const_iterator ie = end();
		for (i = begin(); i != ie; i.moveNext())
		{
			c = i.getCharacter();
			buffer.push_back(c);
		}
	}

} // namespace MyGUI
